/* CPYHDR { */
/*
 * This file is part of the 'iit-rbd' library.
 * Copyright © 2015 2016, Marco Frigerio (marco.frigerio@iit.it)
 *
 * See the LICENSE file for more information.
 */
/* } CPYHDR */

#ifndef IIT_RBD_ROBCOGEN_COMMON_EXPRESSIONS_
#define IIT_RBD_ROBCOGEN_COMMON_EXPRESSIONS_


#include "rbd.h"
#include "InertiaMatrix.h"
#include "TransformsBase.h"
#include "types.h"
#include "internals.h"


namespace iit {
namespace rbd {

/**
 * \defgroup robcogen_commons RobCoGen commons
 *
 * Functions implementing some algebraic expressions recurring in the code
 * generated by RobCoGen.
 *
 * This implementation is usually a bit more efficient than the straightforward
 * way of doing the same thing, with matrix algebra.
 *
 * These functions were written specifically to be used within the code
 * generated by RobCoGen; they expect parameters which have to satisfy maybe more
 * constraints than one would expect from a public API. Similarly, no
 * consistency checks are performed (e.g. verify that the matrix is 6x6).
 * @{
 */


#define DScalar typename Derived::Scalar


/**
 * Calculates the bias force acting on a rigid body with the given velocity and
 * inertial properties.
 *
 * This function provides a hopefully efficient implementation of the term
 * \f[ v \times^{*} I v \f]
 * as described in Featherstone's book about rigid body dynamics algorithms.
 *
 * \param v the spatial velocity of the body
 * \param I the spatial inertia tensor of the body
 * \return the spatial force acting on the body due to its own velocity; from
 *     another point of view, the force required not to produce any acceleration.
 */
template <typename Derived>
Force<DScalar> vxIv(const Velocity<DScalar>& v, const MatrixBase<Derived>& I)
{
    typedef DScalar Scalar;
    // This is a "manual" implementation of the matrix product, as the obvious
    //  implementation (plain matrix-matrix-vector multiplication) would not
    //  exploit the (known) sparsity pattern and the fact that the first and last
    //  term ( v cross* and v) have the same coefficients.

    // Pre-compute some recurring quantities.
    Scalar wx2 = v(AX)*v(AX);
    Scalar wy2 = v(AY)*v(AY);
    Scalar wz2 = v(AZ)*v(AZ);

    Scalar v_AXAY = v(AX)*v(AY);
    Scalar v_AXAZ = v(AX)*v(AZ);
    Scalar v_AXLY = v(AX)*v(LY);
    Scalar v_AXLZ = v(AX)*v(LZ);

    Scalar v_AYAZ = v(AY)*v(AZ);
    Scalar v_AYLX = v(AY)*v(LX);
    Scalar v_AYLZ = v(AY)*v(LZ);

    Scalar v_AZLX = v(AZ)*v(LX);
    Scalar v_AZLY = v(AZ)*v(LY);

    Scalar m   = I(LX,LX); // the mass
    // Copy some of the coefficients for more efficient access
    // 'aa' stands for angular-angular, a 3x3 sub-block of the spatial inertia
    internal::SymmMat3x3Coefficients<Scalar> Iaa(
            I(AX,AX), I(AX,AY), I(AX,AZ),
                      I(AY,AY), I(AY,AZ),
                                I(AZ,AZ));
    Scalar cmX = I(AZ, LY);
    Scalar cmY = I(AX, LZ);
    Scalar cmZ = I(AY, LX);

    typename Core<Scalar>::ForceVector ret;
    ret(AX) = cmZ * (v_AXLZ - v_AZLX)
            + cmY * (v_AXLY - v_AYLX)
            + v_AYAZ * (Iaa.ZZ - Iaa.YY)
            - v_AXAZ*Iaa.XY - (wz2 - wy2)*Iaa.YZ + v_AXAY*Iaa.XZ;

    ret(AY) = cmZ * (v_AYLZ - v_AZLY)
            - cmX * (v_AXLY - v_AYLX)
            - v_AXAZ * (Iaa.ZZ - Iaa.XX)
            + v_AYAZ*Iaa.XY  - v_AXAY*Iaa.YZ + (wz2 - wx2)*Iaa.XZ;

    ret(AZ) = cmX * (v_AZLX - v_AXLZ)
            - cmY * (v_AYLZ - v_AZLY)
            + v_AXAY * (Iaa.YY - Iaa.XX)
            + v_AXAZ*Iaa.YZ - Iaa.XZ*v_AYAZ  - (wy2 - wx2)*Iaa.XY;

    ret(LX) =   m * (v_AYLZ - v_AZLY) - cmX * (wz2 + wy2) + v_AXAZ*cmZ + v_AXAY*cmY;
    ret(LY) = - m * (v_AXLZ - v_AZLX) - cmY * (wz2 + wx2) + v_AXAY*cmX + v_AYAZ*cmZ;
    ret(LZ) =   m * (v_AXLY - v_AYLX) - cmZ * (wy2 + wx2) + v_AXAZ*cmX + v_AYAZ*cmY;
    return ret;
}

/**
 * Calculates the bias force in the special case in which the velocity is a pure
 * rotation about the \c z axis.
 * \see  vxIv(const VelocityVector&, const InertiaMatrixDense&)
 *
 * \param omegaz the scalar rotation velocity about the \c z axis
 * \param I the spatial inertia tensor of the rigid body
 * \return see vxIv(const VelocityVector&, const InertiaMatrixDense&)
 */
template <typename Derived>
inline Force<DScalar> vxIv(const DScalar& omegaz, const MatrixBase<Derived>& I)
{
    DScalar wz2 = omegaz*omegaz;
    Force<DScalar> ret;
    ret(AX) = -I(AY,AZ) * wz2;
    ret(AY) =  I(AX,AY) * wz2;
    ret(AZ) =  0;
    ret(LX) =  I(AY,LZ) * wz2;
    ret(LY) =  I(AZ,LX) * wz2;
    ret(LZ) =  0;
    return ret;
}

// very important, un-define the convenience macros to avoid polluting the namespace
#undef DScalar


/**
 * Calculates the spatial cross product matrix, for motion vectors.
 *
 * \param v the spatial velocity defining the cross product
 * \param[out] vx the 6x6 matrix whose coefficients will be set; must be
 *     initialized with zeros (that is, this function does not assign the
 *     zero elements of the cross product matrix).
 * \tparam the scalar type
 */
template<typename S = double>
inline void motionCrossProductMx(const Velocity<S>& v, Mat66<S>& vx)
{
    vx(AY,AZ) = vx(LY,LZ) = - ( vx(AZ,AY) = vx(LZ,LY) = v(AX) );
    vx(AZ,AX) = vx(LZ,LX) = - ( vx(AX,AZ) = vx(LX,LZ) = v(AY) );
    vx(AX,AY) = vx(LX,LY) = - ( vx(AY,AX) = vx(LY,LX) = v(AZ) );

    vx(LY,AZ) = - ( vx(LZ,AY) = v(LX) );
    vx(LZ,AX) = - ( vx(LX,AZ) = v(LY) );
    vx(LX,AY) = - ( vx(LY,AX) = v(LZ) );
}
/**
 * Calculates the spatial cross product matrix, for force vectors.
 *
 * \param v the spatial velocity defining the cross product
 * \param[out] vx the 6x6 matrix whose coefficients will be set; must be
 *     initialized with zeros (that is, this function does not assign the
 *     zero elements of the cross product matrix).
 */
template<typename S = double>
inline void forceCrossProductMx(const Velocity<S>& v, Mat66<S>& vx)
{
    vx(AY,AZ) = vx(LY,LZ) = - ( vx(AZ,AY) = vx(LZ,LY) = v(AX) );
    vx(AZ,AX) = vx(LZ,LX) = - ( vx(AX,AZ) = vx(LX,LZ) = v(AY) );
    vx(AX,AY) = vx(LX,LY) = - ( vx(AY,AX) = vx(LY,LX) = v(AZ) );

    vx(AY,LZ) = - ( vx(AZ,LY) = v(LX) );
    vx(AZ,LX) = - ( vx(AX,LZ) = v(LY) );
    vx(AX,LY) = - ( vx(AY,LX) = v(LZ) );
}


template<typename S = double>
void fillInertia(S mass, const Vec3<S>& com, const internal::SymmMat3x3Coefficients<S>& I3x3,
        InertiaMat<S>& I)
{
    I(AX,AX) = I3x3.XX;
    I(AY,AY) = I3x3.YY;
    I(AZ,AZ) = I3x3.ZZ;
    I(AY,AX) = I(AX,AY) = I3x3.XY;
    I(AZ,AX) = I(AX,AZ) = I3x3.XZ;
    I(AZ,AY) = I(AY,AZ) = I3x3.YZ;

    I(AX,LY) = I(LY,AX) = - ( I(AY,LX) = I(LX,AY) = mass*com(Z) );
    I(AZ,LX) = I(LX,AZ) = - ( I(AX,LZ) = I(LZ,AX) = mass*com(Y) );
    I(AY,LZ) = I(LZ,AY) = - ( I(AZ,LY) = I(LY,AZ) = mass*com(X) );

    I(LX,LX) = I(LY,LY) = I(LZ,LZ) = mass;
}


/**
 * Roto-translate a spatial inertia tensor.
 *
 * This function is basically an optimized implementation of the formula to do
 * a coordinate transform for a spatial inertia:
 * \f[ XF \cdot I \cdot XF^T \f]
 * (see equation 2.66 of the RBDA book)
 *
 * \param I_A the spatial inertia tensor of a rigid body, expressed in reference frame \c A
 * \param XF a spatial coordinate transform for force vectors, in the form \c B_XF_A
 *        (that is, mapping forces from A to B coordinates)
 * \param[out] I_B the output tensor, expressing the same inertial properties with
 *       coordinates of frame \c B
 */
template<typename Scalar = double>
void transformInertia(
        const InertiaMat<Scalar>& I_A,
        const Mat66<Scalar>&      XF,
              InertiaMat<Scalar>& I_B)
{
    // The coefficients of the 3x3 rotation matrix
    internal::Mat3x3Coefficients<Scalar> E(
            XF(AX,AX),XF(AX,AY),XF(AX,AZ),
            XF(AY,AX),XF(AY,AY),XF(AY,AZ),
            XF(AZ,AX),XF(AZ,AY),XF(AZ,AZ));
    Scalar mass = I_A.getMass();

    // The relative position 'r' of the origin of frame B wrt A (in A coordinates).
    //   The vector is basically "reconstructed" from the matrix XF, which has
    // this form:
    //      | E   -E rx |
    //      | 0    E    |
    // where 'rx' is the cross product matrix. The strategy is to compute
    //  E^T (-E rx) = -rx  , and then get the coordinates of 'r' from 'rx'.
    // This code is a manual implementation of the E transpose multiplication,
    // limited to the elements of interest.
    //    Note that this is necessary because, currently, spatial transforms do
    // not carry explicitly the information about the translation vector.
    Scalar rx = E.XY * XF(AX,LZ) + E.YY * XF(AY,LZ) + E.ZY * XF(AZ,LZ);
    Scalar ry = E.XZ * XF(AX,LX) + E.YZ * XF(AY,LX) + E.ZZ * XF(AZ,LX);
    Scalar rz = E.XX * XF(AX,LY) + E.YX * XF(AY,LY) + E.ZX * XF(AZ,LY);

    // The relative position of the CoM wrt A (in A coordinates)
    Scalar comAx = I_A(AZ,LY)/mass;
    Scalar comAy = I_A(AX,LZ)/mass;
    Scalar comAz = I_A(AY,LX)/mass;

    // The relative position of the CoM wrt B (in A coordinates)
    typename Core<Scalar>::Vector3 p(comAx-rx, comAy-ry, comAz-rz);

    // Pre-computation of some recurring squares
    Scalar cx2 = comAx*comAx;
    Scalar cy2 = comAy*comAy;
    Scalar cz2 = comAz*comAz;
    Scalar px2 = p(X)*p(X);
    Scalar py2 = p(Y)*p(Y);
    Scalar pz2 = p(Z)*p(Z);

    // Manual implementation of the parallel axis theorem, to translate the
    //  3x3 tensor from frame A to frame B :
    internal::SymmMat3x3Coefficients<Scalar> I_translated(
            I_A(AX,AX), I_A(AX,AY), I_A(AX,AZ)
                      , I_A(AY,AY), I_A(AY,AZ)
                                  , I_A(AZ,AZ));
    I_translated.XX += mass*( py2 + pz2   - cy2 - cz2 );
    I_translated.YY += mass*( px2 + pz2   - cx2 - cz2 );
    I_translated.ZZ += mass*( px2 + py2   - cx2 - cy2 );
    I_translated.XY += mass*( comAx*comAy - p(X)*p(Y) );
    I_translated.XZ += mass*( comAx*comAz - p(X)*p(Z) );
    I_translated.YZ += mass*( comAy*comAz - p(Y)*p(Z) );

    // Rotate the 3x3 tensor
    internal::SymmMat3x3Coefficients<Scalar> I3x3_B;
    internal::rot_symmetric_EAET<Scalar>(E, I_translated, I3x3_B);
    // Rotate the CoM vector
    typename Core<Scalar>::Vector3 p_B(
            E.XX*p(X) + E.XY*p(Y) + E.XZ*p(Z),
            E.YX*p(X) + E.YY*p(Y) + E.YZ*p(Z),
            E.ZX*p(X) + E.ZY*p(Y) + E.ZZ*p(Z)
    );

    // Finally copy the coefficients into the destination matrix
    fillInertia(mass, p_B, I3x3_B, I_B);
}

///@} // end of implicit in-group elements [Doxygen]



#define block33 template block<3,3>

/**
 * \name Articulated inertia - Coordinate transform
 *
 * These functions perform the coordinate transform of a spatial articulated
 * inertia, in the special case of a mass-less handle (see chapter 7 of the RBDA
 * book, §7.2.2, equation 7.23)
 *
 * Two specialized functions are available for the two cases of revolute and
 * prismatic joint (which connects the subtree with the mass-less handle), since
 * the inertia exhibits two different sparsity patterns.
 *
 * \param Ia_A the articulated inertia in A coordinates
 * \param XM a spatial coordinate transform for motion vectors, in the form \c A_XM_B
 *        (that is, mapping forces from A to B coordinates)
 * \param[out] Ia_B_const the same articulated inertia, but expressed in B
 *       coordinates. Note that the constness is casted away (trick required
 *       with Eigen)
 */
///@{
/** \ingroup robcogen_commons */
template <typename D1, typename D2, typename D3>
void ctransform_Ia_revolute(
        const MatrixBase<D1>& Ia_A,
        const MatrixBase<D2>& XM,
        const MatrixBase<D3>& Ia_B_const)
{
    typedef typename D1::Scalar Scalar; // we assume D1 D2 D3 use the same scalar type!

    MatrixBase<D3>& Ia_B = const_cast<MatrixBase<D3>&>(Ia_B_const);
    // Get the coefficients of the 3x3 rotation matrix B_E_A
    //  It is the transpose of the angular-angular block of the spatial transform
    internal::Mat3x3Coefficients<Scalar> E(
            XM(AX,AX),XM(AY,AX),XM(AZ,AX),
            XM(AX,AY),XM(AY,AY),XM(AZ,AY),
            XM(AX,AZ),XM(AY,AZ),XM(AZ,AZ));

    // Recover the translation vector. See comment in method 'transformInertia'
    Scalar rx = E.XY * XM(LZ,AX) + E.YY * XM(LZ,AY) + E.ZY * XM(LZ,AZ);
    Scalar ry = E.XZ * XM(LX,AX) + E.YZ * XM(LX,AY) + E.ZZ * XM(LX,AZ);
    Scalar rz = E.XX * XM(LY,AX) + E.YX * XM(LY,AY) + E.ZX * XM(LY,AZ);

    // Angular-angular 3x3 sub-block:

    internal::SymmMat3x3Coefficients<Scalar> aux1, aux2;

    // ## compute  I + (Crx + Crx^T)  :

    // Remember that, for a revolute joint, the structure of the matrix is as
    //  follows (note the zeros):
    //    [ia1,  ia2,  0, ia4,  ia5,  ia6 ]
    //    [ia2,  ia7,  0, ia8,  ia9,  ia10]
    //    [0  ,    0,  0,   0,    0,    0 ]
    //    [ia4,  ia8,  0, ia11, ia12, ia13]
    //    [ia5,  ia9,  0, ia12, ia14, ia15]
    //    [ia6, ia10,  0, ia13, ia15, ia16]

    // copying the coefficients results in slightly fewer invocations of the
    //  operator(int,int), in the rest of the function
    internal::Mat3x3Coefficients<Scalar> C(
            Ia_A(AX,LX),Ia_A(AX,LY),Ia_A(AX,LZ),
            Ia_A(AY,LX),Ia_A(AY,LY),Ia_A(AY,LZ),
            Scalar(0), Scalar(0), Scalar(0));

    aux1.XX = Ia_A(AX,AX) + 2*C.XY*rz - 2*C.XZ*ry;
    aux1.YY = Ia_A(AY,AY) + 2*C.YZ*rx - 2*C.YX*rz;
    aux1.ZZ = Scalar(0);
    aux1.XY = Ia_A(AX,AY) + C.YY*rz - C.XX*rz - C.YZ*ry + C.XZ*rx;
    aux1.XZ = C.XX*ry - C.XY*rx;
    aux1.YZ = C.YX*ry - C.YY*rx;

    // ## compute (- rx M)  (note the minus)
    const internal::SymmMat3x3Coefficients<Scalar> M(
            Ia_A(LX,LX),Ia_A(LX,LY),Ia_A(LX,LZ)
                       ,Ia_A(LY,LY),Ia_A(LY,LZ)
                                   ,Ia_A(LZ,LZ));
    internal::Mat3x3Coefficients<Scalar> rxM;
    rxM.XX = rz*M.XY - ry*M.XZ;
    rxM.YY = rx*M.YZ - rz*M.XY;
    rxM.ZZ = ry*M.XZ - rx*M.YZ;
    rxM.XY = rz*M.YY - ry*M.YZ;
    rxM.YX = rx*M.XZ - rz*M.XX;
    rxM.XZ = rz*M.YZ - ry*M.ZZ;
    rxM.ZX = ry*M.XX - rx*M.XY;
    rxM.YZ = rx*M.ZZ - rz*M.XZ;
    rxM.ZY = ry*M.XY - rx*M.YY;

    // ## compute  (I + (Crx + Crx^T))  -  (rxM) rx
    aux1.XX += rxM.XY*rz - rxM.XZ*ry;
    aux1.YY += rxM.YZ*rx - rxM.YX*rz;
    aux1.ZZ += rxM.ZX*ry - rxM.ZY*rx;
    aux1.XY += rxM.XZ*rx - rxM.XX*rz;
    aux1.XZ += rxM.XX*ry - rxM.XY*rx;
    aux1.YZ += rxM.YX*ry - rxM.YY*rx;

    // ## compute  E ( .. ) E^T
    internal::rot_symmetric_EAET<Scalar>(E, aux1, aux2);

    // Copy the result, angular-angular block of the output
     Ia_B(AX,AX) = aux2.XX;
     Ia_B(AY,AY) = aux2.YY;
     Ia_B(AZ,AZ) = aux2.ZZ;
     Ia_B(AY,AX) = Ia_B(AX,AY) = aux2.XY;
     Ia_B(AZ,AX) = Ia_B(AX,AZ) = aux2.XZ;
     Ia_B(AZ,AY) = Ia_B(AY,AZ) = aux2.YZ;
    // aux2.write(Ia_B.block33(AX,AX)); // this is be equivalent, but it uses a block expression


    // Angular-linear block (and linear-angular block)
    // Calculate E ( C -rxM ) E^T
    //  - note that 'rxM' already contains the coefficients of  (- rx * M)
    //  - for a revolute joint, the last line of C is zero
    rxM.XX += C.XX;
    rxM.XY += C.XY;
    rxM.XZ += C.XZ;
    rxM.YX += C.YX;
    rxM.YY += C.YY;
    rxM.YZ += C.YZ;

    internal::Mat3x3Coefficients<Scalar> aux3;
    internal::rot_EAET<Scalar>(E, rxM, aux3);
    // copy the result, also to the symmetric 3x3 block
    Ia_B(LX,AX) = Ia_B(AX,LX) = aux3.XX;
    Ia_B(LY,AX) = Ia_B(AX,LY) = aux3.XY;
    Ia_B(LZ,AX) = Ia_B(AX,LZ) = aux3.XZ;
    Ia_B(LX,AY) = Ia_B(AY,LX) = aux3.YX;
    Ia_B(LY,AY) = Ia_B(AY,LY) = aux3.YY;
    Ia_B(LZ,AY) = Ia_B(AY,LZ) = aux3.YZ;
    Ia_B(LX,AZ) = Ia_B(AZ,LX) = aux3.ZX;
    Ia_B(LY,AZ) = Ia_B(AZ,LY) = aux3.ZY;
    Ia_B(LZ,AZ) = Ia_B(AZ,LZ) = aux3.ZZ;

    // Linear-linear block
    internal::rot_symmetric_EAET<Scalar>(E, M, aux1);
    Ia_B(LX,LX) = aux1.XX;
    Ia_B(LY,LY) = aux1.YY;
    Ia_B(LZ,LZ) = aux1.ZZ;
    Ia_B(LY,LX) = Ia_B(LX,LY) = aux1.XY;
    Ia_B(LZ,LX) = Ia_B(LX,LZ) = aux1.XZ;
    Ia_B(LZ,LY) = Ia_B(LY,LZ) = aux1.YZ;
    //aux1.write(Ia_B.block33(LX,LX)); // this is be equivalent, but it uses a block expression
}

/** \ingroup robcogen_commons */
template <typename D1, typename D2, typename D3>
void ctransform_Ia_prismatic(
        const MatrixBase<D1>& Ia_A,
        const MatrixBase<D2>& XM,
        const MatrixBase<D3>& Ia_B_const)
{
    typedef typename D1::Scalar Scalar; // we assume D1 D2 D3 use the same scalar type!

    MatrixBase<D3>& Ia_B = const_cast<MatrixBase<D3>&>(Ia_B_const);
    // Get the coefficients of the 3x3 rotation matrix B_E_A
    //  It is the transpose of the angular-angular block of the spatial transform
    internal::Mat3x3Coefficients<Scalar> E(
            XM(AX,AX),XM(AY,AX),XM(AZ,AX),
            XM(AX,AY),XM(AY,AY),XM(AZ,AY),
            XM(AX,AZ),XM(AY,AZ),XM(AZ,AZ));

    // Recover the translation vector. See comment in method 'transformInertia'
    Scalar rx = E.XY * XM(LZ,AX) + E.YY * XM(LZ,AY) + E.ZY * XM(LZ,AZ);
    Scalar ry = E.XZ * XM(LX,AX) + E.YZ * XM(LX,AY) + E.ZZ * XM(LX,AZ);
    Scalar rz = E.XX * XM(LY,AX) + E.YX * XM(LY,AY) + E.ZX * XM(LY,AZ);

    // Angular-angular 3x3 sub-block:

    internal::SymmMat3x3Coefficients<Scalar> aux1, aux2;

    // Remember that, for a prismatic joint, the structure of the matrix is as
    //  follows (note the zeros):

    //    [ia1,  ia2,  ia3, |  ia4,  ia5,   0  ]
    //    [ia2,  ia6,  ia7, |  ia8,  ia9,   0  ]
    //    [ia3,  ia7,  ia10,|  ia11, ia12,  0  ]
    //     -----------------|-----------------
    //    [ia4,  ia8,  ia11,| ia13, ia14,   0  ]
    //    [ia5,  ia9,  ia12,| ia14, ia15,   0  ]
    //    [0  ,    0,    0, |  0,     0,    0  ]

    //    [  I   |  C  ]
    //      -----|-----
    //    [  C^T |  M  ]

    // copying the coefficients results in slightly fewer invocations of the
    //  operator(int,int), in the rest of the function
    internal::Mat3x3Coefficients<Scalar> C(
            Ia_A(AX,LX), Ia_A(AX,LY), 0,
            Ia_A(AY,LX), Ia_A(AY,LY), 0,
            Ia_A(AZ,LX), Ia_A(AZ,LY), 0);

    // ## compute  I + (Crx + Crx^T)  :
    aux1.XX = Ia_A(AX,AX) + 2*C.XY*rz;
    aux1.YY = Ia_A(AY,AY) - 2*C.YX*rz;
    aux1.ZZ = Ia_A(AZ,AZ) + 2*C.ZX*ry - 2*C.ZY*rx;
    aux1.XY = Ia_A(AX,AY) + (C.YY - C.XX)*rz;
    aux1.XZ = Ia_A(AX,AZ) + C.ZY*rz + C.XX*ry - C.XY*rx;
    aux1.YZ = Ia_A(AY,AZ) - C.ZX*rz + C.YX*ry - C.YY*rx;


    const internal::SymmMat3x3Coefficients<Scalar> M(
            Ia_A(LX,LX),Ia_A(LX,LY), 0
                       ,Ia_A(LY,LY), 0
                                   , 0);
    // ## compute the term (- rx M)  (note the minus)
    internal::Mat3x3Coefficients<Scalar> rxM;
    rxM.XX = rz*M.XY;
    rxM.XY = rz*M.YY;
    rxM.YX = - rz*M.XX;
    rxM.YY = - rz*M.XY;
    rxM.ZX = ry*M.XX - rx*M.XY;
    rxM.ZY = ry*M.XY - rx*M.YY;
    rxM.XZ = 0.0;
    rxM.YZ = 0.0;
    rxM.ZZ = 0.0;

    // ## compute  (I + (Crx + Crx^T))  -  (rxM) rx
    aux1.XX += rxM.XY*rz;
    aux1.YY += - rxM.YX*rz;
    aux1.ZZ += rxM.ZX*ry - rxM.ZY*rx;
    aux1.XY += - rxM.XX*rz;
    aux1.XZ += rxM.XX*ry - rxM.XY*rx;
    aux1.YZ += rxM.YX*ry - rxM.YY*rx;

    // ## compute  E ( I + Crx + (Crx)^T - rx M rx ) E^T
    internal::rot_symmetric_EAET<Scalar>(E, aux1, aux2);

    // Copy the result, angular-angular block of the output
    Ia_B(AX,AX) = aux2.XX;
    Ia_B(AY,AY) = aux2.YY;
    Ia_B(AZ,AZ) = aux2.ZZ;
    Ia_B(AY,AX) = Ia_B(AX,AY) = aux2.XY;
    Ia_B(AZ,AX) = Ia_B(AX,AZ) = aux2.XZ;
    Ia_B(AZ,AY) = Ia_B(AY,AZ) = aux2.YZ;
    // aux2.write(Ia_B.block33(AX,AX)); // this is be equivalent, but it uses a block expression


    // Angular-linear block (and linear-angular block)
    // Calculate E ( C -rxM ) E^T
    //  - note that 'rxM' already contains the coefficients of  (- rx * M)
    //  - for a prismatic joint, the last column of C is zero
    rxM.XX += C.XX;
    rxM.XY += C.XY;
    //rxM.XZ stays zero
    rxM.YX += C.YX;
    rxM.YY += C.YY;
    //rxM.YZ stays zero
    rxM.ZX += C.ZX;
    rxM.ZY += C.ZY;

    internal::Mat3x3Coefficients<Scalar> aux3;
    internal::rot_EAET<Scalar>(E, rxM, aux3); // TODO exploit the zeros in rxM
    // copy the result, also to the symmetric 3x3 block
    Ia_B(LX,AX) = Ia_B(AX,LX) = aux3.XX;
    Ia_B(LY,AX) = Ia_B(AX,LY) = aux3.XY;
    Ia_B(LZ,AX) = Ia_B(AX,LZ) = aux3.XZ;
    Ia_B(LX,AY) = Ia_B(AY,LX) = aux3.YX;
    Ia_B(LY,AY) = Ia_B(AY,LY) = aux3.YY;
    Ia_B(LZ,AY) = Ia_B(AY,LZ) = aux3.YZ;
    Ia_B(LX,AZ) = Ia_B(AZ,LX) = aux3.ZX;
    Ia_B(LY,AZ) = Ia_B(AZ,LY) = aux3.ZY;
    Ia_B(LZ,AZ) = Ia_B(AZ,LZ) = aux3.ZZ;

    // Linear-linear block
    internal::rot_symmetric_EAET<Scalar>(E, M, aux1); // TODO exploit the zeros in M
    Ia_B(LX,LX) = aux1.XX;
    Ia_B(LY,LY) = aux1.YY;
    Ia_B(LZ,LZ) = aux1.ZZ;
    Ia_B(LY,LX) = Ia_B(LX,LY) = aux1.XY;
    Ia_B(LZ,LX) = Ia_B(LX,LZ) = aux1.XZ;
    Ia_B(LZ,LY) = Ia_B(LY,LZ) = aux1.YZ;
    //aux1.write(Ia_B.block33(LX,LX)); // this is be equivalent, but it uses a block expression
}
///@}
#undef block33

/**
 * \name Articulated inertia - Computation
 *
 * These functions calculate the spatial articulated inertia of a subtree, in
 * the special case of a mass-less handle (see chapter 7 of the RBDA
 * book, §7.2.2, equation 7.23)
 *
 * Two specialized functions are available for the two cases of revolute and
 * prismatic joint (which connects the subtree with the mass-less handle), since
 * the inertia exhibits two different sparsity patterns.
 *
 * \param IA the regular articulated inertia of some subtree
 * \param U the U term for the current joint (cfr. eq. 7.43 of RBDA)
 * \param D the D term for the current joint (cfr. eq. 7.44 of RBDA)
 * \param[out] Ia_const the articulated inertia for the same subtree, but
 *       propagated across the current joint. The matrix is assumed to be
 *       already initialized with zeros, at least in the row and column which
 *       is known to be zero. In other words, those elements are not computed
 *       nor assigned in this function.
 *       Note that the constness of the argument is
 *       casted away (trick required with Eigen), as this is an output
 *       argument.
 */
///@{
/** \ingroup robcogen_commons */
template <typename D1, typename D2>
void compute_Ia_revolute(
        const MatrixBase<D1>& IA,
        const Vec6<typename D1::Scalar>& U,
        const typename D1::Scalar&       D,
        const MatrixBase<D2>& Ia_const)
{
    typedef typename D1::Scalar Scalar; // we assume D1 D2 use the same scalar type!

    MatrixBase<D2>& Ia = const_cast<MatrixBase<D2>&>(Ia_const);

    Scalar UD_AX = U(AX) / D;
    Scalar UD_AY = U(AY) / D;
    //Scalar UD_AZ = U(AZ) / D;
    Scalar UD_LX = U(LX) / D;
    Scalar UD_LY = U(LY) / D;
    Scalar UD_LZ = U(LZ) / D;

    Ia(AX,AX) = IA(AX,AX) - U(AX)*UD_AX;
    Ia(AX,AY) = Ia(AY,AX) = IA(AX,AY) - U(AX)*UD_AY;
    //Ia(AX,AZ) = Ia(AZ,AX) = 0; // it is assumed to be set already
    Ia(AX,LX) = Ia(LX,AX) = IA(AX,LX) - U(AX)*UD_LX;
    Ia(AX,LY) = Ia(LY,AX) = IA(AX,LY) - U(AX)*UD_LY;
    Ia(AX,LZ) = Ia(LZ,AX) = IA(AX,LZ) - U(AX)*UD_LZ;

    Ia(AY,AY) = IA(AY,AY) - U(AY)*UD_AY;
    //Ia(AY,AZ) = Ia(AZ,AY) = 0; // it is assumed to be set already
    Ia(AY,LX) = Ia(LX,AY) = IA(AY,LX) - U(AY)*UD_LX;
    Ia(AY,LY) = Ia(LY,AY) = IA(AY,LY) - U(AY)*UD_LY;
    Ia(AY,LZ) = Ia(LZ,AY) = IA(AY,LZ) - U(AY)*UD_LZ;

    //The whole row AZ it is assumed to be already set to zero

    Ia(LX,LX) = IA(LX,LX) - U(LX)*UD_LX;
    Ia(LX,LY) = Ia(LY,LX) = IA(LX,LY) - U(LX)*UD_LY;
    Ia(LX,LZ) = Ia(LZ,LX) = IA(LX,LZ) - U(LX)*UD_LZ;

    Ia(LY,LY) = IA(LY,LY) - U(LY)*UD_LY;
    Ia(LY,LZ) = Ia(LZ,LY) = IA(LY,LZ) - U(LY)*UD_LZ;

    Ia(LZ,LZ) = IA(LZ,LZ) - U(LZ)*UD_LZ;
}

/** \ingroup robcogen_commons */
template <typename D1, typename D2>
void compute_Ia_prismatic(
        const MatrixBase<D1>& IA,
        const Vec6<typename D1::Scalar>& U,
        const typename D1::Scalar&       D,
        const MatrixBase<D2>& Ia_const)
{
    typedef typename D1::Scalar Scalar; // we assume D1 D2 use the same scalar type!

    MatrixBase<D2>& Ia = const_cast<MatrixBase<D2>&>(Ia_const);

    Scalar UD_AX = U(AX) / D;
    Scalar UD_AY = U(AY) / D;
    Scalar UD_AZ = U(AZ) / D;
    Scalar UD_LX = U(LX) / D;
    Scalar UD_LY = U(LY) / D;
    //Scalar UD_LZ = U(LZ) / D;

    Ia(AX,AX) = IA(AX,AX) - U(AX)*UD_AX;
    Ia(AX,AY) = Ia(AY,AX) = IA(AX,AY) - U(AX)*UD_AY;
    Ia(AX,AZ) = Ia(AZ,AX) = IA(AX,AZ) - U(AX)*UD_AZ;
    Ia(AX,LX) = Ia(LX,AX) = IA(AX,LX) - U(AX)*UD_LX;
    Ia(AX,LY) = Ia(LY,AX) = IA(AX,LY) - U(AX)*UD_LY;
    //Ia(AX,LZ) = Ia(LZ,AX) = 0; // it is assumed to be set already

    Ia(AY,AY) = IA(AY,AY) - U(AY)*UD_AY;
    Ia(AY,AZ) = Ia(AZ,AY) = IA(AY,AZ) - U(AY)*UD_AZ;
    Ia(AY,LX) = Ia(LX,AY) = IA(AY,LX) - U(AY)*UD_LX;
    Ia(AY,LY) = Ia(LY,AY) = IA(AY,LY) - U(AY)*UD_LY;
    //Ia(AY,LZ) = Ia(LZ,AY) = 0; // it is assumed to be set already

    Ia(AZ,AZ) = IA(AZ,AZ) - U(AZ)*UD_AZ;
    Ia(AZ,LX) = Ia(LX,AZ) = IA(AZ,LX) - U(AZ)*UD_LX;
    Ia(AZ,LY) = Ia(LY,AZ) = IA(AZ,LY) - U(AZ)*UD_LY;
    //Ia(AZ,LZ) = Ia(LZ,AZ) = 0; // it is assumed to be set already

    Ia(LX,LX) = IA(LX,LX) - U(LX)*UD_LX;
    Ia(LX,LY) = Ia(LY,LX) = IA(LX,LY) - U(LX)*UD_LY;
    //Ia(LX,LZ) = Ia(LZ,LX) = 0; // it is assumed to be set already

    Ia(LY,LY) = IA(LY,LY) - U(LY)*UD_LY;
    //Ia(LY,LZ) = Ia(LZ,LY) = 0; // it is assumed to be set already

    //Ia(LZ,LZ) = 0; // it is assumed to be set already
}
///@}




}
}

#endif
